{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "id": "KgNWMAfrWz8r",
    "colab_type": "code",
    "colab": {}
   },
   "outputs": [],
   "source": [
    "# http://pytorch.org/\n",
    "from os.path import exists\n",
    "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag\n",
    "platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())\n",
    "cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\\.\\([0-9]*\\)\\.\\([0-9]*\\)$/cu\\1\\2/'\n",
    "accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "dzhILRnNVS-a",
    "colab_type": "code",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 125.0
    },
    "outputId": "d29a4372-2558-4dad-8417-3469ccdd0ce0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 1000 cost = 0.709715\n",
      "Epoch: 2000 cost = 0.593492\n",
      "Epoch: 3000 cost = 0.156285\n",
      "Epoch: 4000 cost = 0.092555\n",
      "Epoch: 5000 cost = 0.076225\n",
      "sorry hate you is Bad Mean...\n"
     ]
    }
   ],
   "source": [
    "'''\n",
    "  code by Tae Hwan Jung(Jeff Jung) @graykode\n",
    "'''\n",
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from torch.autograd import Variable\n",
    "import torch.nn.functional as F\n",
    "\n",
    "dtype = torch.FloatTensor\n",
    "\n",
    "# Text-CNN Parameter\n",
    "embedding_size = 2 # n-gram\n",
    "sequence_length = 3\n",
    "num_classes = 2  # 0 or 1\n",
    "filter_sizes = [2, 2, 2] # n-gram window\n",
    "num_filters = 3\n",
    "\n",
    "# 3 words sentences (=sequence_length is 3)\n",
    "sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n",
    "labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n",
    "\n",
    "word_list = \" \".join(sentences).split()\n",
    "word_list = list(set(word_list))\n",
    "word_dict = {w: i for i, w in enumerate(word_list)}\n",
    "vocab_size = len(word_dict)\n",
    "\n",
    "inputs = []\n",
    "for sen in sentences:\n",
    "    inputs.append(np.asarray([word_dict[n] for n in sen.split()]))\n",
    "\n",
    "targets = []\n",
    "for out in labels:\n",
    "    targets.append(out) # To using Torch Softmax Loss function\n",
    "\n",
    "input_batch = Variable(torch.LongTensor(inputs))\n",
    "target_batch = Variable(torch.LongTensor(targets))\n",
    "\n",
    "\n",
    "class TextCNN(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(TextCNN, self).__init__()\n",
    "\n",
    "        self.num_filters_total = num_filters * len(filter_sizes)\n",
    "        self.W = nn.Parameter(torch.empty(vocab_size, embedding_size).uniform_(-1, 1)).type(dtype)\n",
    "        self.Weight = nn.Parameter(torch.empty(self.num_filters_total, num_classes).uniform_(-1, 1)).type(dtype)\n",
    "        self.Bias = nn.Parameter(0.1 * torch.ones([num_classes])).type(dtype)\n",
    "\n",
    "    def forward(self, X):\n",
    "        embedded_chars = self.W[X] # [batch_size, sequence_length, sequence_length]\n",
    "        embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]\n",
    "\n",
    "        pooled_outputs = []\n",
    "        for filter_size in filter_sizes:\n",
    "            # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]\n",
    "            conv = nn.Conv2d(1, num_filters, (filter_size, embedding_size), bias=True)(embedded_chars)\n",
    "            h = F.relu(conv)\n",
    "            # mp : ((filter_height, filter_width))\n",
    "            mp = nn.MaxPool2d((sequence_length - filter_size + 1, 1))\n",
    "            # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]\n",
    "            pooled = mp(h).permute(0, 3, 2, 1)\n",
    "            pooled_outputs.append(pooled)\n",
    "\n",
    "        h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]\n",
    "        h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]\n",
    "\n",
    "        model = torch.mm(h_pool_flat, self.Weight) + self.Bias # [batch_size, num_classes]\n",
    "        return model\n",
    "\n",
    "model = TextCNN()\n",
    "\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# Training\n",
    "for epoch in range(5000):\n",
    "    optimizer.zero_grad()\n",
    "    output = model(input_batch)\n",
    "\n",
    "    # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)\n",
    "    loss = criterion(output, target_batch)\n",
    "    if (epoch + 1) % 1000 == 0:\n",
    "        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
    "\n",
    "    loss.backward()\n",
    "    optimizer.step()\n",
    "\n",
    "# Test\n",
    "test_text = 'sorry hate you'\n",
    "tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n",
    "test_batch = Variable(torch.LongTensor(tests))\n",
    "\n",
    "# Predict\n",
    "predict = model(test_batch).data.max(1, keepdim=True)[1]\n",
    "if predict[0][0] == 0:\n",
    "    print(test_text,\"is Bad Mean...\")\n",
    "else:\n",
    "    print(test_text,\"is Good Mean!!\")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "TextCNN-Torch.ipynb",
   "version": "0.3.2",
   "provenance": [],
   "collapsed_sections": []
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  },
  "accelerator": "GPU"
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
